*WIC co-op eWIC new diff in diff analysis
*last modified: 13 May 2025
*last modified by: Charlotte Ambrozek
*this do file implements the csdid2 package to estimate individual 2x2 doubly robust did estimates and aggregate to obtain an att 

clear all
set more off
set rmsg on

set maxvar 120000
set emptycells drop

local data_dir ./data/cleaned
local raw_dir ./data/raw
local out_dir ./analysis/output
local graph_dir ./analysis/output/graphs
local tab_dir ./analysis/output/tables
local log_dir ./documentation/logs
local date: display %tdYY-NN-DD date(c(current_date), "DMY")
di "`date'"
capture log close

log using `log_dir'/ewic_csdid_zip`date', replace

*import cleaned eWIC implementation info into own frame
*eWIC implementation variable is at county fips/fiscal year level 
*partition data into state/treatment year groups

*import cleaned ewic implementation info into own frame
mkf ewic_fy
frame ewic_fy{
	use ./data/cleaned/ewic_rollout.dta, clear
	rename year fiscalyear
	drop pre* post* 
}

mkf income
frame income{
	use ./data/cleaned/zipyearmedianincome.dta, clear
	keep if year == 2011
	drop year
	destring zip, replace
}

cwf ewic_fy

*import two separate cleaned redemptions datasets (second is balanced to only include ZIPs that have non-missing redemptions in all years)
mkf red_fy
frame red_fy{
	use ./data/cleaned/tip_wic_redemptions_05_18, clear
	rename (wic_redemptions) (wicred)
}

mkf red_bal_fy
*load balanced redemptions as an attempted fix
frame red_bal_fy{
	use ./data/cleaned/tip_wic_bal_redemptions_05_18, clear
	rename (wic_redemptions) (wicred)
}

*import TIP store data into own frame
mkf tip_fy
frame tip_fy{
	local data_dir ./data/cleaned
	use `data_dir'/tip_auth_sq, clear
	*drop "Direct Distribution Center" and "Home Food Delivery Contractor" types
	gen f = (vendor_type1 == 3 & (vendor_type2 == 4 | missing(vendor_type2))) | (vendor_type1 == 4 & (vendor_type2 == 3 | missing(vendor_type2)))
	bys tip_id: egen flag = mode(f)
	bys tip_id: replace vendor_type1 = vendor_type1[_n+1] if flag == 0 & f == 1
	drop if flag == 1
	drop flag f tip_year_id
	*fill in gaps in state fips using modes
	*get st_fips
	generate str_fips = string(ct_fips)
	replace str_fips = "0" + str_fips if strlen(str_fips) == 4
	generate st_fips = substr(str_fips, 1, 2)
	destring st_fips, replace 
	bys st_fips: egen state_mode = mode(state)
	replace state = state_mode if state != state_mode & !missing(state_mode)
	drop str_fips state_mode

	*drop Vermont, Mississippi, + missing st_fips
	drop if st_fips == 50 | st_fips == 32 | st_fips == 28 | missing(st_fips)
	
	*link in low income/high poverty info
	frlink m:1 zip, frame(income) generate(inc_link)
	frget lowinc highpov, from(inc_link)

	*link ewic implementation info
	frlink m:1 ct_fips fiscalyear, frame(ewic_fy) generate(ewic_link)
	frget ev_year , from(ewic_link)
	* assume treated at earliest exposure
	bys tip_id: egen ey_min = min(ev_year)

	* assume treated at earliest exposure
	replace ev_year = ey_min if ev_year != ey_min & !missing(ey_min)
	bys tip_id (ev_year): assert ev_year[1] == ev_year[_N]

	*assert no dups
	duplicates report tip_id fiscalyear 
	assert r(N) == r(unique_value)
	
	*this zip has multiple implementation dates and can't be reconciled
	drop if zip == 42223
	
	*put fields necessary to create a frame of unique zip/year obs with ewic implementation; later will merge this to tip_fy again and then collapse to get aggregated zip level outcomes
	compress
	frame put zip fiscalyear ev_year state st_fips lowinc highpov, into(zip_sq_frame)
	*put all obs into frame to use for zip collapsing
	frame put _all, into(zip_sq)
}

frame zip_sq_frame{
	duplicates drop 
	
	bys zip: assert state[1] == state[_N]
	bys zip: assert st_fips[1] == st_fips[_N]
	*resolve discrepancies in values of ewic implementation variables as follows:
	*ev_year is equal to the min of ev_year across the zip/fiscal year (use the earliest possible event year)
	bys zip: egen ey_min = min(ev_year)
	* assume treated at earliest exposure
	replace ev_year = ey_min if ev_year != ey_min & !missing(ey_min)
	bys zip: assert ev_year[1] == ev_year[_N]

	duplicates drop

	duplicates report zip fiscalyear 
	assert r(N) == r(unique_value)	
	compress
}

*collapse to zip/fiscalyear level and then link ewic policy variables back in
frame zip_sq{
	*reshape by chain status to allow looking at chain status as heterogeneity later; may also want to do this for a50 or vendor type at some point
	collapse (sum) auth (first) state st_fips lowinc highpov, by(zip fiscalyear chain)
	gen cstring = ""
	replace cstring = "chain" if chain == 1
	replace cstring = "indep" if chain == 0
	drop chain
	reshape wide auth, i(zip fiscalyear lowinc highpov) j(cstring) string
	assert !missing(zip) & !missing(fiscalyear)
	egen auth = rowtotal(auth*)
	recode authindep authchain (missing = 0)
	*start linking in other policy variables and outcomes (redemptions)
	frlink 1:1 zip fiscalyear, frame(zip_sq_frame) generate(ewic_link)
	frlink 1:1 zip fiscalyear, frame(red_fy) generate(red_link)
	frlink 1:1 zip fiscalyear, frame(red_bal_fy) generate(red_bal_link)
	frget ev_year , from(ewic_link)
	frget wicred, from(red_link)
	frget wicbalred = wicred, from(red_bal_link)
	compress
	assert !missing(ev_year) & !missing(fiscalyear) & !missing(zip)
	xtset zip fiscalyear
	compress
	*log 
	foreach var of varlist auth authindep authchain {
		format `var' %11.6f
		local r = abs(rnormal(0, 0.000001))
		tempvar zero
		gen `zero' = `var' == 0
		replace `var' = `r' if `zero' == 1
		gen double l`var' = log(`var')
		*check that log conversion using errors worked
		assert !missing(l`var') if !missing(`var')
		replace `var' = 0 if `zero' == 1
 	}
	foreach var of varlist wicred wicbalred{
		gen double l`var' = log(`var')
		bys zip: egen std`var' = std(`var')
		gen double `var'hk = `var'/100000
	}
	*standardize vars to see if this helps with left censoring
	foreach var of varlist auth authindep authchain{
		*use built in egen fn for this (mean zero, sd 1)
		bys zip: egen std`var' = std(`var'), mean(0) sd(1)
	}
	compress
}

mkf csdidsimple float(b se pval ll ul) str25(outcome subsample)
mkf csdidevent float(b se pval ll ul) int(ev_yr) str25(outcome subsample)
mkf csdideventavg float(avgb avgse avgpval avgll avgul) int(times) str25(time outcome subsample)
mkf csdidgroup float(b se pval ll ul) str25(gr_yr outcome subsample)

cwf zip_sq
cd `out_dir'
csdid wicredhk, ivar(zip) time(fiscalyear) gvar(ev_year) notyet saverif(csdid_zip_wicredhk) replace cluster(st_fips)
estat simple
frame post csdidsimple (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) ("wicredhk") ("all")
estat event, window(-4 4)
*the structure of the returned matrix here is columns pre_avg post_avg tm4 tm3 ... tm1 tp0 tp1 ... tp4
*i assume that tm# is pre period # and tp# is post period #
* so we want to capture columns 3 through 11 for the event periods, plus the pre/post averages (in a separate frame that can be linked on later).
frame post csdideventavg (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) (4) ("pre") ("wicredhk") ("all")
frame post csdideventavg (r(table)[1,2]) (r(table)[2,2]) (r(table)[4,2]) (r(table)[5,2]) (r(table)[6,2]) (5) ("post") ("wicredhk") ("all")
local i = 2
forval y = -4/4{
	local i = `i' + 1
	frame post csdidevent (r(table)[1,`i']) (r(table)[2,`i']) (r(table)[4,`i']) (r(table)[5,`i']) (r(table)[6,`i']) (`y') ("wicredhk") ("all")
}

estat group
local names: colnames r(table)
local names = subinstr("`names'", "G", "", .)
forval y = 1/10{
	local year: word `y' of `names'
	frame post csdidgroup (r(table)[1,`y']) (r(table)[2,`y']) (r(table)[4,`y']) (r(table)[5,`y']) (r(table)[6,`y']) ("`year'") ("wicredhk") ("all")
}

cwf zip_sq
cd `out_dir'
csdid wicbalredhk, ivar(zip) time(fiscalyear) gvar(ev_year) notyet saverif(csdid_zip_wicbalredhk) replace cluster(st_fips)
estat simple
frame post csdidsimple (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) ("wicredhk") ("bal")
estat event, window(-4 4)
*the structure of the returned matrix here is columns pre_avg post_avg tm4 tm3 ... tm1 tp0 tp1 ... tp4
*i assume that tm# is pre period # and tp# is post period #
* so we want to capture columns 3 through 11 for the event periods, plus the pre/post averages (in a separate frame that can be linked on later).
frame post csdideventavg (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) (4) ("pre") ("wicredhk") ("bal")
frame post csdideventavg (r(table)[1,2]) (r(table)[2,2]) (r(table)[4,2]) (r(table)[5,2]) (r(table)[6,2]) (5) ("post") ("wicredhk") ("bal")
local i = 2
forval y = -4/4{
	local i = `i' + 1
	frame post csdidevent (r(table)[1,`i']) (r(table)[2,`i']) (r(table)[4,`i']) (r(table)[5,`i']) (r(table)[6,`i']) (`y') ("wicredhk") ("bal")
}

estat group
local names: colnames r(table)
local names = subinstr("`names'", "G", "", .)
forval y = 1/10{
	local year: word `y' of `names'
	frame post csdidgroup (r(table)[1,`y']) (r(table)[2,`y']) (r(table)[4,`y']) (r(table)[5,`y']) (r(table)[6,`y']) ("`year'") ("wicredhk") ("bal")
}

cwf zip_sq
frame put _all if lowinc == 1, into(lowinc)
cwf lowinc

cd `out_dir'
csdid wicredhk, ivar(zip) time(fiscalyear) gvar(ev_year) notyet saverif(csdid_zip_wicredhklowinc) replace cluster(st_fips)
estat simple
frame post csdidsimple (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) ("wicredhk") ("lowinc")
estat event, window(-4 4)
*the structure of the returned matrix here is columns pre_avg post_avg tm4 tm3 ... tm1 tp0 tp1 ... tp4
*i assume that tm# is pre period # and tp# is post period #
* so we want to capture columns 3 through 11 for the event periods, plus the pre/post averages (in a separate frame that can be linked on later).
frame post csdideventavg (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) (4) ("pre") ("wicredhk") ("lowinc")
frame post csdideventavg (r(table)[1,2]) (r(table)[2,2]) (r(table)[4,2]) (r(table)[5,2]) (r(table)[6,2]) (5) ("post") ("wicredhk") ("lowinc")
local i = 2
forval y = -4/4{
	local i = `i' + 1
	frame post csdidevent (r(table)[1,`i']) (r(table)[2,`i']) (r(table)[4,`i']) (r(table)[5,`i']) (r(table)[6,`i']) (`y') ("wicredhk") ("lowinc")
}
estat group
local names: colnames r(table)
local names = subinstr("`names'", "G", "", .)
forval y = 1/10{
	local year: word `y' of `names'
	frame post csdidgroup (r(table)[1,`y']) (r(table)[2,`y']) (r(table)[4,`y']) (r(table)[5,`y']) (r(table)[6,`y']) ("`year'") ("wicredhk") ("lowinc")
}

cwf zip_sq
frame put _all if highpov == 1, into(highpov)
cwf highpov
frame drop lowinc

cd `out_dir'
csdid wicredhk, ivar(zip) time(fiscalyear) gvar(ev_year) notyet saverif(csdid_zip_wicredhkhighpov) replace cluster(st_fips)
estat simple
frame post csdidsimple (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) ("wicredhk") ("highpov")
estat event, window(-4 4)
*the structure of the returned matrix here is columns pre_avg post_avg tm4 tm3 ... tm1 tp0 tp1 ... tp4
*i assume that tm# is pre period # and tp# is post period #
* so we want to capture columns 3 through 11 for the event periods, plus the pre/post averages (in a separate frame that can be linked on later).
frame post csdideventavg (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) (4) ("pre") ("wicredhk") ("highpov")
frame post csdideventavg (r(table)[1,2]) (r(table)[2,2]) (r(table)[4,2]) (r(table)[5,2]) (r(table)[6,2]) (5) ("post") ("wicredhk") ("highpov")
local i = 2
forval y = -4/4{
	local i = `i' + 1
	frame post csdidevent (r(table)[1,`i']) (r(table)[2,`i']) (r(table)[4,`i']) (r(table)[5,`i']) (r(table)[6,`i']) (`y') ("wicredhk") ("highpov")
}
estat group
local names: colnames r(table)
local names = subinstr("`names'", "G", "", .)
forval y = 1/10{
	local year: word `y' of `names'
	frame post csdidgroup (r(table)[1,`y']) (r(table)[2,`y']) (r(table)[4,`y']) (r(table)[5,`y']) (r(table)[6,`y']) ("`year'") ("wicredhk") ("highpov")
}

frame csdideventavg{
	expand times, generate(new)
	bys time outcome subsample (new): gen ev_yr = sum(new)
	replace ev_yr = ev_yr - 4 if time == "pre"
	drop time new times
}

cwf csdidevent
frlink 1:1 ev_yr outcome subsample, frame(csdideventavg)
frget avgb avgse avgpval avgll avgul, from(csdideventavg)
drop csdideventavg

frame drop csdideventavg

cwf csdidsimple
frame drop highpov
save `out_dir'/csdid_zip_simple.dta, replace
cwf csdidevent
save `out_dir'/csdid_zip_event.dta, replace  
cwf csdidgroup
save `out_dir'/csdid_zip_group.dta, replace  
log close 
